Setup and Configuration



In [1]:

    
from cassandra.cqlengine import *
from cassandra.cqlengine.models import Model
from cassandra.cqlengine.management import sync_table, drop_table
from cassandra.cqlengine.connection import setup
from cassandra.cqlengine.columns import *
from uuid import uuid1, uuid4
from random import randint

setup(["localhost"], "tutorial")

Any kwargs passed to setup() will be passed to Cluster(), so it's possible to use the policies from the native driver.



In [2]:

    
%load_ext cql
%cql drop keyspace if exists tutorial
%cql CREATE KEYSPACE tutorial WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1};
%keyspace tutorial









    



---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-2-a41bd49095d4> in <module>()
----> 1 get_ipython().magic(u'load_ext cql')
      2 get_ipython().magic(u'cql drop keyspace if exists tutorial')
      3 get_ipython().magic(u"cql CREATE KEYSPACE tutorial WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1};")
      4 get_ipython().magic(u'keyspace tutorial')

/Users/jhaddad/.virtualenvs/python-presentation/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in magic(self, arg_s)
   2203         magic_name, _, magic_arg_s = arg_s.partition(' ')
   2204         magic_name = magic_name.lstrip(prefilter.ESC_MAGIC)
-> 2205         return self.run_line_magic(magic_name, magic_arg_s)
   2206 
   2207     #-------------------------------------------------------------------------

/Users/jhaddad/.virtualenvs/python-presentation/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in run_line_magic(self, magic_name, line)
   2124                 kwargs['local_ns'] = sys._getframe(stack_depth).f_locals
   2125             with self.builtin_trap:
-> 2126                 result = fn(*args,**kwargs)
   2127             return result
   2128 

/Users/jhaddad/.virtualenvs/python-presentation/lib/python2.7/site-packages/IPython/core/magics/extension.pyc in load_ext(self, module_str)

/Users/jhaddad/.virtualenvs/python-presentation/lib/python2.7/site-packages/IPython/core/magic.pyc in <lambda>(f, *a, **k)
    191     # but it's overkill for just that one bit of state.
    192     def magic_deco(arg):
--> 193         call = lambda f, *a, **k: f(*a, **k)
    194 
    195         if callable(arg):

/Users/jhaddad/.virtualenvs/python-presentation/lib/python2.7/site-packages/IPython/core/magics/extension.pyc in load_ext(self, module_str)
     61         if not module_str:
     62             raise UsageError('Missing module name.')
---> 63         res = self.shell.extension_manager.load_extension(module_str)
     64 
     65         if res == 'already loaded':

/Users/jhaddad/.virtualenvs/python-presentation/lib/python2.7/site-packages/IPython/core/extensions.pyc in load_extension(self, module_str)
     96             if module_str not in sys.modules:
     97                 with prepended_to_syspath(self.ipython_extension_dir):
---> 98                     __import__(module_str)
     99             mod = sys.modules[module_str]
    100             if self._call_load_ipython_extension(mod):

ImportError: No module named cql

Defining a Model

Models are the core of cqlengine. We defined a model similar to other object mappers. We can specify

primary keys
clustering keys-
default values
clustering sorting order
indexes
force a field to be required
create a field as static (shared by all rows)



In [3]:

    
class SensorData(Model):
    sensor_id = UUID(primary_key=True)
    created_at = TimeUUID(primary_key=True, default=uuid1)
    reading = Integer()

Table Management - Creating & Dropping



In [4]:

    
drop_table(SensorData)
sync_table(SensorData)

Inserting Data



In [16]:

    
sensor_id = uuid4()

for x in range(10):
    reading = randint(0, 10)
    data = SensorData.create(sensor_id=sensor_id, reading=reading)
    print "New Reading %d: %d" % (x, reading)









    



New Reading 0: 1
New Reading 1: 9
New Reading 2: 8
New Reading 3: 7
New Reading 4: 4
New Reading 5: 10
New Reading 6: 4
New Reading 7: 6
New Reading 8: 0
New Reading 9: 9

Updating



In [6]:

    
data.reading = 1
data.save()

# or 

data.update(reading=10)









    Out[6]:





SensorData <sensor_id=29716577-8e4b-4187-a38e-8c840ffd117e, created_at=b4abd56b-ccfa-11e4-96ac-74d4358a0878>

Deleting Data



In [7]:

    
data.delete()

# or we can delete blindly if we know the id

SensorData.objects(sensor_id=sensor_id).delete()

Collections



In [8]:

    
class CrazyCollections(Model):
    id = Integer(primary_key=True)
    int_list = List(Integer)
    float_map = Map(Text, Float)
    text_set = Set(Text)

drop_table(CrazyCollections)
sync_table(CrazyCollections)

obj = CrazyCollections.create(id=1, 
                              int_list=[1,2,3], 
                              float_map={"bacon":1.1, "eggs":2.0},
                              text_set=set(["apple", "pie"]))

print "The map: ", obj.float_map
print "The list: ", obj.int_list
print "The set: ", obj.text_set









    



The map:  {'eggs': 2.0, 'bacon': 1.1}
The list:  [1, 2, 3]
The set:  set(['apple', 'pie'])

Validation



In [9]:

    
class NoJonModel(Model):
    id = UUID(primary_key=True)
    name = Text()
    
    def validate(self):
        super(NoJonModel, self).validate()
        if self.name == "Jon":
            raise ValidationError("No Jon's allowed")

sync_table(NoJonModel)
NoJonModel.create(name="Jon")









    



---------------------------------------------------------------------------
ValidationError                           Traceback (most recent call last)
<ipython-input-9-e7a17383e02a> in <module>()
      9 
     10 sync_table(NoJonModel)
---> 11 NoJonModel.create(name="Jon")

/Users/jhaddad/.virtualenvs/python-presentation/lib/python2.7/site-packages/cassandra_driver-2.1.4.post-py2.7-macosx-10.9-x86_64.egg/cassandra/cqlengine/models.pyc in create(cls, **kwargs)
    596         if extra_columns:
    597             raise ValidationError("Incorrect columns passed: {}".format(extra_columns))
--> 598         return cls.objects.create(**kwargs)
    599 
    600     @classmethod

/Users/jhaddad/.virtualenvs/python-presentation/lib/python2.7/site-packages/cassandra_driver-2.1.4.post-py2.7-macosx-10.9-x86_64.egg/cassandra/cqlengine/query.pyc in create(self, **kwargs)
    713         return self.model(**kwargs).batch(self._batch).ttl(self._ttl).\
    714             consistency(self._consistency).if_not_exists(self._if_not_exists).\
--> 715             timestamp(self._timestamp).save()
    716 
    717     def delete(self):

/Users/jhaddad/.virtualenvs/python-presentation/lib/python2.7/site-packages/cassandra_driver-2.1.4.post-py2.7-macosx-10.9-x86_64.egg/cassandra/cqlengine/models.pyc in save(self)
    653                 setattr(self, self._discriminator_column_name, self.__discriminator_value__)
    654 
--> 655         self.validate()
    656         self.__dmlquery__(self.__class__, self,
    657                           batch=self._batch,

<ipython-input-9-e7a17383e02a> in validate(self)
      6         super(NoJonModel, self).validate()
      7         if self.name == "Jon":
----> 8             raise ValidationError("No Jon's allowed")
      9 
     10 sync_table(NoJonModel)

ValidationError: No Jon's allowed

TTL

TTLs are supported on all mutations:



In [10]:

    
SensorData.ttl(60).create(sensor_id=uuid4(), reading=5)









    Out[10]:





SensorData <sensor_id=5193c70c-522d-4bdc-b5a3-b7449f7f9c89, created_at=b7cbeed9-ccfa-11e4-a28c-74d4358a0878>

Querying

Single Row `get()`

The simplest way to get a single row out of the database is the get call. Use this when you are expecting a single result.



In [12]:

    
class User(Model):
    name = Text(primary_key=True)
    age = Integer()

drop_table(User)
sync_table(User)

User.create(name="Jon", age=33)

print User.get(name="Jon")









    



User <name=Jon>

User FAIL: If the row does not exist, or multiple come back, an exception will be thrown:



In [13]:

    
print User.get(name="Pete")









    



---------------------------------------------------------------------------
DoesNotExist                              Traceback (most recent call last)
<ipython-input-13-5eee9c1070c1> in <module>()
----> 1 print User.get(name="Pete")

/Users/jhaddad/.virtualenvs/python-presentation/lib/python2.7/site-packages/cassandra_driver-2.1.4.post-py2.7-macosx-10.9-x86_64.egg/cassandra/cqlengine/models.pyc in get(cls, *args, **kwargs)
    623         This is a pass-through to the model objects().:method:`~cqlengine.queries.get`.
    624         """
--> 625         return cls.objects.get(*args, **kwargs)
    626 
    627     def timeout(self, timeout):

/Users/jhaddad/.virtualenvs/python-presentation/lib/python2.7/site-packages/cassandra_driver-2.1.4.post-py2.7-macosx-10.9-x86_64.egg/cassandra/cqlengine/query.pyc in get(self, *args, **kwargs)
    575         """
    576         if args or kwargs:
--> 577             return self.filter(*args, **kwargs).get()
    578 
    579         self._execute_query()

/Users/jhaddad/.virtualenvs/python-presentation/lib/python2.7/site-packages/cassandra_driver-2.1.4.post-py2.7-macosx-10.9-x86_64.egg/cassandra/cqlengine/query.pyc in get(self, *args, **kwargs)
    579         self._execute_query()
    580         if len(self._result_cache) == 0:
--> 581             raise self.model.DoesNotExist
    582         elif len(self._result_cache) > 1:
    583             raise self.model.MultipleObjectsReturned('{} objects found'.format(len(self._result_cache)))

DoesNotExist:

Multiple Rows

To pull back multiple rows, we'll Querysets are chainable and lazily evaluated on access. Simply calling .objects returns a QuerySet which has not been evaluated.



In [17]:

    
SensorData.objects(sensor_id=sensor_id)









    Out[17]:





<cassandra.cqlengine.query.ModelQuerySet at 0x10c32fe50>

By iterating over the queryset we tell the query to execute:



In [15]:

    
for s in SensorData.objects(sensor_id=sensor_id).limit(2):
    print s, s.reading

Filtering

Usage of the <, > etc operators is limited to clustering keys.



In [18]:

    
class Automobile(Model):
    manufacturer = Text(primary_key=True)
    year = Integer(primary_key=True)
    model = Text(primary_key=True)
    price = Decimal()

drop_table(Automobile)
sync_table(Automobile)

Automobile.create(manufacturer="Tesla", year=2013, model="Model S", price=70000)
Automobile.create(manufacturer="Tesla", year=2014, model="Model S", price=70000)
Automobile.create(manufacturer="Tesla", year=2015, model="Model S", price=70000)
Automobile.create(manufacturer="Tesla", year=2015, model="Model X", price=50000)

list(Automobile.objects(manufacturer="Tesla", year__gt=2014))









    Out[18]:





[Automobile <manufacturer=Tesla, year=2015, model=Model S>,
 Automobile <manufacturer=Tesla, year=2015, model=Model X>]



In [ ]:

    
# alternate syntax

list(Automobile.objects(manufacturer="Tesla").filter(Automobile.year > 2014))

Ordering

It's possible to order results from a queryset, so long as the order is the clustering key.



In [ ]:

    
class OrderedTable(Model):
    id = Integer(primary_key=True)
    ck = Integer(primary_key=True)
    val = Integer()

drop_table(OrderedTable)
sync_table(OrderedTable)

for x in range(10):
    OrderedTable.create(id=1, ck=x, val=x)

print "Forward: ", [x.ck for x in OrderedTable.objects(id=1)]
print "Reversed: ", [x.ck for x in OrderedTable.objects(id=1).order_by("-ck")]

Flattening Results

If we're pulling back a big list of data, we frequently need to flatten it into a list of tuples.



In [ ]:

    
list(SensorData.objects().values_list('reading', 'created_at'))

Or just a list:



In [ ]:

    
list(SensorData.objects().values_list('reading', flat=True))

Batches

Batches are useful in situations where you need to know that all or none of the queries get executed. By default a batch is logged, meaning even if the mutation is only partially applied, eventually the whole thing will be. This is useful when 2 tables absolutely have to be updated (such as some data and an index)



In [ ]:

    
from cqlengine import BatchQuery

drop_table(SensorData)
sync_table(SensorData)

with BatchQuery() as b:
    for x in range(10):
        reading = randint(0, 10)
        SensorData.batch(b).create(sensor_id=sensor_id, reading=reading)

Lightweight Transactions



In [ ]:

    
jon = User.if_not_exists().create(name="Jon", age=100)



In [ ]:

    
jon.iff(age=100).update(age=101)

Table Properties



In [ ]:

    
from cassandra.cqlengine import LeveledCompactionStrategy

class User(Model):
    __compaction__ = LeveledCompactionStrategy
    __gc_grace_seconds__ = 10000
    __read_repair_chance__  = 0.5
    user_id = UUID(primary_key=True)
    name = Text()

drop_table(User)
sync_table(User)

Table Inheritance

Sometimes it's useful to have tables inherit from other tables. As a contrived example, what if we were to model tweets? Using table inheritance we can create 2 tables that inherit from a common parent.



In [ ]:

    
from datetime import datetime

class BaseTweet(Model):
    __abstract__ = True
    tweet_id = Integer(primary_key=True)
    text = Text()
    created_at = DateTime(default=datetime.now)
    
class StandardTweet(BaseTweet):
    pass

class PromotedTweet(BaseTweet):
    budget = Decimal()    

sync_table(StandardTweet)
sync_table(PromotedTweet)



In [ ]:

    
%tables

Each of the different tweet types lives in it's own table but the structure is inherited from the base table.

Table Polymorphism

The above example will create 2 tables, one for each type of tweet. But what if we want to do a single query that gets both results? There's no UNION built into CQL. For this, we use table polymorphism.



In [ ]:

    
from datetime import datetime

class Tweet(Model):
    __table_name__ = 'tweet'
    __discriminator_column__ = 'ad_type'
    tweet_id = Integer(primary_key=True)
    text = Text()
    created_at = DateTime(default=datetime.now)
    ad_type = Text(index=True)
    
class StandardTweet(Tweet):
     __discriminator_value__  = 'standard'

class PromotedTweet(Tweet):
    __discriminator_value__  = 'promoted'
    budget = Decimal()  
    
drop_table(Tweet)    
sync_table(StandardTweet)
sync_table(PromotedTweet)



In [ ]:

    
%tables



In [ ]:

    
import pprint
pp = pprint.PrettyPrinter(indent=4)



In [ ]:

    
for x in range(2):
    StandardTweet.create(tweet_id=x, text="standard")
for x in range(2, 4):
    PromotedTweet.create(tweet_id=x, text="promoted")

Tweet.objects()[:]



In [ ]:

    
print "standard tweets: "
pp.pprint(StandardTweet.objects()[:])
print "promoted tweets: "
pp.pprint(PromotedTweet.objects()[:])

Sphinx documenation

Django integration.



In [ ]: